# Structure of index construction 
# 1. For each time period t, construct sums by stratum (e.g. store x product group)	
# 2. Chain sums across t to get indices 

# List of options
# 1. Laspeyres/Lowe (default) 
# 2. Geometric Laspeyres 3. Tornqvist 4. Fix weights across time 5. Fix weights across space 6. County-UPC-quarter obs 7. One-stage construction

# Replace Price with unit_price to construct with "posted" prices instead of quantity-weighted average prices 

verbose.Print.Time <- function(verbose, msg=NA, prev.time=NA) {
    cur.time <- proc.time()[3]
    if (verbose) {
        if (is.na(prev.time)) { # no time to print w/o prev.time 
            cat(sprintf("%s", msg))
        } else {
            if (is.na(msg)) { # if no message, end of timing, just print time
                cat(sprintf("(%1.0fs), ", cur.time-prev.time))
            } else { # usual case, print time since last message and message
                cat(sprintf("(%1.0fs); %s ", cur.time-prev.time, msg))
            }
        }	
    }
    return(cur.time)
}

# Extract file names from repository, name them by product module/group 
get.file.locations <- function(sort.movement.size = FALSE, sort.reverse = FALSE,
                            repository, by_state = TRUE, state = c()) {
    if (by_state) {

    movement.files <-list.files(repository, pattern = sprintf('*_%d.RData', state),
                     recursive = TRUE, full.names = TRUE) 
    }

    else {

    movement.files <-list.files(repository, recursive = TRUE, full.names = TRUE) 
    }

    modules <- unique(gsub(".*/([0-9]{3,4})(_[0-9]{1,2})?.RData", "\\1",
                           movement.files))

    modules <- modules[order(modules, decreasing=sort.reverse)]

    movement = lapply(modules, function(x) {
                        # Remember to put / in front of %s or else matches 503 with 1503! 
                        movement.files[grep(sprintf("*/%s(_[0-9]{1,2})?.RData$", x),
                                          movement.files)]})

    names(movement) <- modules 


    if (sort.movement.size) {
        move.size <- sapply(movement, function(x) sum(file.info(x)$size))
        movement <- movement[order(move.size, decreasing=sort.reverse)]
    }
    return(movement)

}

PI_calc_06_07 <- function(dt, tvc) {
	# If input data is empty, setkey and return price index = 1 
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(dt, quarter)
        tvc[, `:=` (P_t = ifelse(quarter == 1, 1L, NaN), P_gl_t = ifelse(quarter == 1, 1L, NaN), P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = ifelse(quarter == 1, 1L, NaN), P_y_fix_lm = 0 )]
        setkey(tvc, quarter)
    return(tvc)
    } else {    
	# If input data is non-empty, construct price indices as last period's index (1 in the case of quarter 2) * P_y_lm_{t} / P_y_lm_{t-1}
        for (i in 2:nrow(dt)) {
        dt[i,  `:=` (P_t = ifelse(quarter == 2, dt[i, P_y_lm]/dt[i-1, P_y_lm],
                    dt[i-1, P_t]*dt[i, P_y_lm]/dt[i-1, P_y_lm]), 
					P_gl_t = ifelse(quarter == 2, exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]),
                    dt[i-1, P_gl_t]*(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
					PI_start = 0, PI_end = 0, PI_gl_end = 0, PI_gl_start = 0,
					P_fix_t = dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm]) )]
    }
    setkey(dt, quarter)
    # dt[, c("P_y_lm","P_y_glm") := NULL]
	# Merge input data with sales 
    comb <- tvc[dt, nomatch= 0]
    setkey(comb, quarter)
    return(comb)
    }
}

# Last term is end year value, used as construction of first observation in each year 
PI_calc_08 <- function(dt, tvc, table, dt_24, dt_25, dt_gl_24, dt_gl_25) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(9,12) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)

        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[8, P_t]*(dt_25/dt_24),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[8, P_gl_t]*exp(dt_gl_25-dt_gl_24),
            dt[i-1, P_gl_t]*(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_25, PI_end = dt_24, PI_gl_end = dt_gl_25, PI_gl_start = dt_gl_24,
			P_fix_t = ifelse(quarter == 1, table[8, P_fix_t]*(dt[i, P_y_fix_lm]/ table[8, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(9,12)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
    return(stacked)
    }
}

PI_calc_09 <- function(dt, tvc, table, dt_36, dt_37, dt_gl_36, dt_gl_37) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)        
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(13,16) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)

        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[12, P_t]*(dt_37/dt_36),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[12, P_gl_t]*exp(dt_gl_37-dt_gl_36),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_37, PI_end = dt_36, PI_gl_end = dt_gl_37, PI_gl_start = dt_gl_36,
			P_fix_t = ifelse(quarter == 1, table[12, P_fix_t]*(dt[i, P_y_fix_lm]/ table[12, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(13,16)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
    return(stacked)
    }
}

PI_calc_10 <- function(dt, tvc, table, dt_48, dt_49, dt_gl_48, dt_gl_49) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)          
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(17,20) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)

        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[16, P_t]*(dt_49/dt_48),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[16, P_gl_t]*exp(dt_gl_49-dt_gl_48),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_49, PI_end = dt_48, PI_gl_end = dt_gl_49, PI_gl_start = dt_gl_48,
			P_fix_t = ifelse(quarter == 1, table[16, P_fix_t]*(dt[i, P_y_fix_lm]/ table[16, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(17,20)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)

    return(stacked)
    }
}

PI_calc_11 <- function(dt, tvc, table, dt_60, dt_61, dt_gl_60, dt_gl_61) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)          
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(21,24) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)

        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[20, P_t]*(dt_61/dt_60),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[20, P_gl_t]*exp(dt_gl_61-dt_gl_60),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_61, PI_end = dt_60, PI_gl_end = dt_gl_61, PI_gl_start = dt_gl_60,
			P_fix_t = ifelse(quarter == 1, table[20, P_fix_t]*(dt[i, P_y_fix_lm]/ table[20, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(21,24)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
    return(stacked)
    }
}

PI_calc_12 <- function(dt, tvc, table, dt_72, dt_73, dt_gl_72, dt_gl_73) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)          
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(25,28) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)
        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[24, P_t]*(dt_73/dt_72),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[24, P_gl_t]*exp(dt_gl_73-dt_gl_72),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_73, PI_end = dt_72, PI_gl_end = dt_gl_73, PI_gl_start = dt_gl_72,
			P_fix_t = ifelse(quarter == 1, table[24, P_fix_t]*(dt[i, P_y_fix_lm]/ table[24, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(25,28)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
    return(stacked)
    }
}

PI_calc_13 <- function(dt, tvc, table, dt_84, dt_85, dt_gl_84, dt_gl_85) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)          
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(29,32) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)
        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[28, P_t]*(dt_85/dt_84),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[28, P_gl_t]*exp(dt_gl_85-dt_gl_84),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_85, PI_end = dt_84, PI_gl_end = dt_gl_85, PI_gl_start = dt_gl_84,
			P_fix_t = ifelse(quarter == 1, table[28, P_fix_t]*(dt[i, P_y_fix_lm]/ table[28, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(29,32)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
    return(stacked)
    }
}

PI_calc_14 <- function(dt, tvc, table, dt_96, dt_97, dt_gl_96, dt_gl_97) {
    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)          
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(33,36) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)
        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
         for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[32, P_t]*(dt_97/dt_96),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[32, P_gl_t]*exp(dt_gl_97-dt_gl_96),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_97, PI_end = dt_96, PI_gl_end = dt_gl_97, PI_gl_start = dt_gl_96,
			P_fix_t = ifelse(quarter == 1, table[32, P_fix_t]*(dt[i, P_y_fix_lm]/ table[32, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
    }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(33,36)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
    return(stacked)
    }
}

PI_calc_15 <- function(dt, tvc, table, dt_108, dt_109, dt_gl_108, dt_gl_109, group) {

    if (nrow(dt) == 0) {
        setkey(tvc, quarter)
        setkey(table, quarter)          
        tvc[, `:=` (P_t = NaN, P_gl_t = NaN, P_y_lm = 0, P_y_glm = 0, PI_end = 0, PI_start = 0, PI_gl_end = 0, PI_gl_start = 0, P_fix_t = NaN, P_y_fix_lm = 0, quarter = seq(37,40) )]
        h = list(table, tvc)
        stacked_h <- rbindlist(h, use.names=TRUE)
        setkey(stacked_h, quarter)
        stacked_h[,`:=`(TVC_t_1 = c(0, TVC_t[-.N]), P_t_1 = c(0, P_t[-.N]), P_gl_t_1 = c(0, P_gl_t[-.N]), P_fix_t_1 = c(0, P_fix_t[-.N]), 
                        Group = as.integer(group))]
        setcolorder(stacked_h, c("quarter", "Group", "VUL_t", "TVC_t", 
                                 "TVC_t_1", "P_t", "P_t_1", "P_gl_t","P_gl_t_1", "P_y_lm", "P_y_glm", "PI_start", "PI_end", "PI_gl_end", "PI_gl_start", "P_fix_t", "P_fix_t_1", "P_y_fix_lm"))
        setkey(stacked_h, quarter)
        return(stacked_h)
    } 
    else {
        setkey(dt, quarter)
        setkey(table, quarter)
        setkey(tvc, quarter)        
        for (i in 1:nrow(dt)) {
            dt[i, ':=' (P_t = ifelse(quarter == 1, 
            table[36, P_t]*(dt_109/dt_108),
            dt[i-1, P_t] *(dt[i, P_y_lm]/ dt[i-1, P_y_lm])), 
			P_gl_t = ifelse(quarter == 1, 
            table[36, P_gl_t]*exp(dt_gl_109-dt_gl_108),
            dt[i-1, P_gl_t] *(exp(dt[i, P_y_glm]-dt[i-1, P_y_glm]))),
			PI_start = dt_109, PI_end = dt_108, PI_gl_end = dt_gl_109, PI_gl_start = dt_gl_108,
			P_fix_t = ifelse(quarter == 1, table[36, P_fix_t]*(dt[i, P_y_fix_lm]/ table[36, P_y_fix_lm]), dt[i-1, P_fix_t]*(dt[i, P_y_fix_lm]/ dt[i-1, P_y_fix_lm])) )]
        }
    # dt[, c("P_y_lm","P_y_glm") := NULL]

    setkey(dt, quarter)
    setkey(tvc, quarter)
    comb <- tvc[dt, nomatch = 0]
    comb[, quarter := seq(37,40)]
    l = list(table, comb)
    stacked <- rbindlist(l, use.names=TRUE)
    setkey(stacked, quarter)
        stacked[,`:=`(TVC_t_1 = c(0, TVC_t[-.N]), P_t_1 = c(0, P_t[-.N]), P_gl_t_1 = c(0, P_gl_t[-.N]), P_fix_t_1 = c(0, P_fix_t[-.N]),
                        Group = as.integer(group))]
    setcolorder(stacked, c("quarter", "Group", "VUL_t", "TVC_t", 
                                 "TVC_t_1", "P_t", "P_t_1", "P_gl_t","P_gl_t_1", "P_y_lm", "P_y_glm", "PI_start", "PI_end", "PI_gl_end", "PI_gl_start", "P_fix_t", "P_fix_t_1", "P_y_fix_lm"))
    setkey(stacked, quarter)

    return(stacked)
    }
}

# Merge sales data with temp data, convert NA to zero 
replace.missing <- function(tvc, temp) {
    tvc_na <- tvc[temp, nomatch = NA]
    na.with.zero(tvc_na)
    return(tvc_na)
}

# Convert missing values to zero for input data 
na.with.zero <-  function(DT) {

    for (j in seq_len(ncol(DT)))
        set(DT,which(is.na(DT[[j]])),j,0)

}


# Return same data table with only locations that are not missing in any year 

geo.level.check <- function(dt, type) {

    switch(type, 
        by_county = {

        county_all <- list(unique(dt[year == 2006, fips_county_code]),
                        unique(dt[year == 2007, fips_county_code]),
                        unique(dt[year == 2008, fips_county_code]),
                        unique(dt[year == 2009, fips_county_code]),
                        unique(dt[year == 2010, fips_county_code]),
                        unique(dt[year == 2011, fips_county_code]),
                        unique(dt[year == 2012, fips_county_code]),
                        unique(dt[year == 2013, fips_county_code]), 
                        unique(dt[year == 2014, fips_county_code]), 						
                        unique(dt[year == 2015, fips_county_code]))
        county_all <- Reduce(intersect, county_all)
        dt_county <- dt[fips_county_code %in% county_all,] 
        return(dt_county)

        },

        by_MSA = {

        MSA_all <- list(unique(dt[year == 2006, MSA]),
                    unique(dt[year == 2007, MSA]),
                    unique(dt[year == 2008, MSA]),
                    unique(dt[year == 2009, MSA]),
                    unique(dt[year == 2010, MSA]),
                    unique(dt[year == 2011, MSA]),
                    unique(dt[year == 2012, MSA]),  
                    unique(dt[year == 2013, MSA]),
                    unique(dt[year == 2014, MSA]),
                    unique(dt[year == 2015, MSA]))					
        MSA_all <- Reduce(intersect, MSA_all)
        dt_MSA <- dt[MSA %in% MSA_all,]
        return(dt_MSA)
        
        },
        
        by_store = {
            
            store_all <- list(unique(dt[year == 2006, store_code_uc]),
                            unique(dt[year == 2007, store_code_uc]),
                            unique(dt[year == 2008, store_code_uc]),
                            unique(dt[year == 2009, store_code_uc]),
                            unique(dt[year == 2010, store_code_uc]),
                            unique(dt[year == 2011, store_code_uc]),
                            unique(dt[year == 2012, store_code_uc]),  
                            unique(dt[year == 2013, store_code_uc]),
                            unique(dt[year == 2014, store_code_uc]),
                            unique(dt[year == 2015, store_code_uc]))							
            store_all <- Reduce(intersect, store_all)
            dt_store <- dt[store_code_uc %in% store_all,]
            return(dt_store)

        })

}
	
fast.subset <-function(dt, type)  {

    l = {}

    ## let's use binary search as this is way faster approach. 

    switch(type, 
        by_county = {
    setkey(dt, fips_county_code, year)
	
    counties <- sort(unique(dt$fips_county_code))
	
	i = 0 
    for (county in counties) {
		i = i+1 
	
        l$Y2006[[i]] <- dt[.(county,2006)]
        l$Y2007[[i]] <- dt[.(county,2007)]
        l$Y2008[[i]] <- dt[.(county,2008)]
        l$Y2009[[i]] <- dt[.(county,2009)]
        l$Y2010[[i]] <- dt[.(county,2010)]
        l$Y2011[[i]] <- dt[.(county,2011)]
        l$Y2012[[i]] <- dt[.(county,2012)]
        l$Y2013[[i]] <- dt[.(county,2013)]
        l$Y2014[[i]] <- dt[.(county,2014)]
        l$Y2015[[i]] <- dt[.(county,2015)]		
    }

    l$Y2006 <- Filter(Negate(is.null), l$Y2006)
    l$Y2007 <- Filter(Negate(is.null), l$Y2007)
    l$Y2008 <- Filter(Negate(is.null), l$Y2008)
    l$Y2009 <- Filter(Negate(is.null), l$Y2009)
    l$Y2010 <- Filter(Negate(is.null), l$Y2010)
    l$Y2011 <- Filter(Negate(is.null), l$Y2011)
    l$Y2012 <- Filter(Negate(is.null), l$Y2012)
    l$Y2013 <- Filter(Negate(is.null), l$Y2013)
    l$Y2014 <- Filter(Negate(is.null), l$Y2014)
    l$Y2015 <- Filter(Negate(is.null), l$Y2015)
	
    names(l$Y2006) <- counties
    names(l$Y2007) <- counties
    names(l$Y2008) <- counties
    names(l$Y2009) <- counties
    names(l$Y2010) <- counties
    names(l$Y2011) <- counties
    names(l$Y2012) <- counties
    names(l$Y2013) <- counties
    names(l$Y2014) <- counties
    names(l$Y2015) <- counties

    return(l)

    },

    by_MSA = {
    setkey(dt, MSA, year)

    MSA <- sort(unique(dt$MSA))
    

    for (msa in MSA) {

        l$Y2006[[msa]] <- dt[.(msa,2006)]
        l$Y2007[[msa]] <- dt[.(msa,2007)]
        l$Y2008[[msa]] <- dt[.(msa,2008)]
        l$Y2009[[msa]] <- dt[.(msa,2009)]
        l$Y2010[[msa]] <- dt[.(msa,2010)]
        l$Y2011[[msa]] <- dt[.(msa,2011)]
        l$Y2012[[msa]] <- dt[.(msa,2012)]
        l$Y2013[[msa]] <- dt[.(msa,2013)]
        l$Y2014[[msa]] <- dt[.(msa,2014)]
        l$Y2015[[msa]] <- dt[.(msa,2015)]		
    }

    l$Y2006 <- Filter(Negate(is.null), l$Y2006)
    l$Y2007 <- Filter(Negate(is.null), l$Y2007)
    l$Y2008 <- Filter(Negate(is.null), l$Y2008)
    l$Y2009 <- Filter(Negate(is.null), l$Y2009)
    l$Y2010 <- Filter(Negate(is.null), l$Y2010)
    l$Y2011 <- Filter(Negate(is.null), l$Y2011)
    l$Y2012 <- Filter(Negate(is.null), l$Y2012)
    l$Y2013 <- Filter(Negate(is.null), l$Y2013)
    l$Y2014 <- Filter(Negate(is.null), l$Y2014)
    l$Y2015 <- Filter(Negate(is.null), l$Y2015)
	
    names(l$Y2006) <- MSA
    names(l$Y2007) <- MSA
    names(l$Y2008) <- MSA
    names(l$Y2009) <- MSA
    names(l$Y2010) <- MSA
    names(l$Y2011) <- MSA
    names(l$Y2012) <- MSA
    names(l$Y2013) <- MSA
    names(l$Y2014) <- MSA
    names(l$Y2015) <- MSA

    return(l)
    
    },
    # BY STORE, ADDED 
    by_store = {
		# Setting key is a fixed cost that increases speed of all future computations (variable costs)
        setkey(dt, store_code_uc, year)
        
        stores <- sort(unique(dt$store_code_uc))
        
		i=0
        for (store in stores) {
            i=i+1 
			
            l$Y2006[[i]] <- dt[.(store,2006)]
            l$Y2007[[i]] <- dt[.(store,2007)]
            l$Y2008[[i]] <- dt[.(store,2008)]
            l$Y2009[[i]] <- dt[.(store,2009)]
            l$Y2010[[i]] <- dt[.(store,2010)]
            l$Y2011[[i]] <- dt[.(store,2011)]
            l$Y2012[[i]] <- dt[.(store,2012)]
            l$Y2013[[i]] <- dt[.(store,2013)]
            l$Y2014[[i]] <- dt[.(store,2014)]
            l$Y2015[[i]] <- dt[.(store,2015)]			
        }
        
        l$Y2006 <- Filter(Negate(is.null), l$Y2006)
        l$Y2007 <- Filter(Negate(is.null), l$Y2007)
        l$Y2008 <- Filter(Negate(is.null), l$Y2008)
        l$Y2009 <- Filter(Negate(is.null), l$Y2009)
        l$Y2010 <- Filter(Negate(is.null), l$Y2010)
        l$Y2011 <- Filter(Negate(is.null), l$Y2011)
        l$Y2012 <- Filter(Negate(is.null), l$Y2012)
        l$Y2013 <- Filter(Negate(is.null), l$Y2013)
        l$Y2014 <- Filter(Negate(is.null), l$Y2014)
        l$Y2015 <- Filter(Negate(is.null), l$Y2015) 
		
        names(l$Y2006) <- stores
        names(l$Y2007) <- stores
        names(l$Y2008) <- stores
        names(l$Y2009) <- stores
        names(l$Y2010) <- stores
        names(l$Y2011) <- stores
        names(l$Y2012) <- stores
        names(l$Y2013) <- stores
        names(l$Y2014) <- stores
        names(l$Y2015) <- stores        
        
        return(l)  

    })      


}


# Input list of units (e.g. stores, counties, states) with their prices, output first stage price index (unit x product group/module) for all years 
parallel.county  <- function(l, group, core) {

    cores <- core
    prev.time <- verbose.Print.Time(T, "parallel.county")

    out67 <- mcMap(Year_06_07, l$Y2006, l$Y2007, 
                    mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out67", prev.time)
    
    out08 <- mcMap(Year_general, l$Y2007, l$Y2008, 
                2008, mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out08", prev.time)
    
    out09 <- mcMap(Year_general, l$Y2008, l$Y2009, 
                2009, mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out09", prev.time)
    
    out10 <- mcMap(Year_general, l$Y2009, l$Y2010, 
                2010, mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out10", prev.time)
    
    out11 <- mcMap(Year_general, l$Y2010, l$Y2011, 
                2011,mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out11", prev.time)
    
    out12 <- mcMap(Year_general, l$Y2011, l$Y2012, 
                2012,mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out12", prev.time)
    
    out13 <- mcMap(Year_general, l$Y2012, l$Y2013, 
                2013,mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out13", prev.time)
    
	out14 <- mcMap(Year_general, l$Y2013, l$Y2014, 
                2014,mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out14", prev.time)

    out15 <- mcMap(Year_general, l$Y2014, l$Y2015, 
                2015,mc.set.seed = TRUE, mc.cores = cores)
    prev.time <- verbose.Print.Time(T, "out15", prev.time)    
	
    #rm(dt_county, pos = ".GlobalEnv")
    rm(l)
	
    l_PI_67 <- lapply(out67, '[[', "PI")
    l_TVC_67 <- lapply(out67, '[[', "VUL_TVC")

    rm(out67)

    l_PI_8 <- lapply(out08, '[[', "PI")
    l_TVC_8 <- lapply(out08, '[[', "VUL_TVC")
	end_year_07 <- lapply(out08, '[[', "PI_end")
    start_year_08 <- lapply(out08, '[[', "PI_start")
	end_year_gl_07 <- lapply(out08, '[[', "PI_gl_end")
    start_year_gl_08 <- lapply(out08, '[[', "PI_gl_start")		
    rm(out08)

    l_PI_9 <- lapply(out09, '[[', "PI")
    l_TVC_9 <- lapply(out09, '[[', "VUL_TVC")
	end_year_08 <- lapply(out09, '[[', "PI_end")
    start_year_09 <- lapply(out09, '[[', "PI_start")
	end_year_gl_08 <- lapply(out09, '[[', "PI_gl_end")
    start_year_gl_09 <- lapply(out09, '[[', "PI_gl_start")	
    rm(out09)

    l_PI_10 <- lapply(out10, '[[', "PI")
    l_TVC_10 <- lapply(out10, '[[', "VUL_TVC")
	end_year_09 <- lapply(out10, '[[', "PI_end")
    start_year_10 <- lapply(out10, '[[', "PI_start")
	end_year_gl_09 <- lapply(out10, '[[', "PI_gl_end")
    start_year_gl_10 <- lapply(out10, '[[', "PI_gl_start")	
    rm(out10)

    l_PI_11 <- lapply(out11, '[[', "PI")
    l_TVC_11 <- lapply(out11, '[[', "VUL_TVC")
	end_year_10 <- lapply(out11, '[[', "PI_end")
    start_year_11 <- lapply(out11, '[[', "PI_start")
	end_year_gl_10 <- lapply(out11, '[[', "PI_gl_end")
    start_year_gl_11 <- lapply(out11, '[[', "PI_gl_start")	
    rm(out11)

    l_PI_12 <- lapply(out12, '[[', "PI")
    l_TVC_12 <- lapply(out12, '[[', "VUL_TVC")
	end_year_11 <- lapply(out12, '[[', "PI_end")
    start_year_12 <- lapply(out12, '[[', "PI_start")
	end_year_gl_11 <- lapply(out12, '[[', "PI_gl_end")
    start_year_gl_12 <- lapply(out12, '[[', "PI_gl_start")		
    rm(out12)

    l_PI_13 <- lapply(out13, '[[', "PI")
    l_TVC_13 <- lapply(out13, '[[', "VUL_TVC")
	end_year_12 <- lapply(out13, '[[', "PI_end")
    start_year_13 <- lapply(out13, '[[', "PI_start")
	end_year_gl_12 <- lapply(out13, '[[', "PI_gl_end")
    start_year_gl_13 <- lapply(out13, '[[', "PI_gl_start")	
    rm(out13)

    l_PI_14 <- lapply(out14, '[[', "PI")
    l_TVC_14 <- lapply(out14, '[[', "VUL_TVC")
	end_year_13 <- lapply(out14, '[[', "PI_end")
    start_year_14 <- lapply(out14, '[[', "PI_start")
	end_year_gl_13 <- lapply(out14, '[[', "PI_gl_end")
    start_year_gl_14 <- lapply(out14, '[[', "PI_gl_start")	
    rm(out14)

    l_PI_15 <- lapply(out15, '[[', "PI")
    l_TVC_15 <- lapply(out15, '[[', "VUL_TVC")
	end_year_14 <- lapply(out15, '[[', "PI_end")
    start_year_15 <- lapply(out15, '[[', "PI_start")
	end_year_gl_14 <- lapply(out15, '[[', "PI_gl_end")
    start_year_gl_15 <- lapply(out15, '[[', "PI_gl_start")	
    rm(out15)
	
    PI_0607 <- mcMap(PI_calc_06_07, l_PI_67, l_TVC_67, 
                      mc.set.seed = TRUE, mc.cores = cores)

    rm(l_PI_67, l_TVC_67)

    PI_08 <- mcMap(PI_calc_08, l_PI_8, l_TVC_8, PI_0607, end_year_07, start_year_08, end_year_gl_07, start_year_gl_08,
                      mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_8, l_TVC_8, PI_0607)

    PI_09 <- mcMap(PI_calc_09, l_PI_9, l_TVC_9, PI_08, end_year_08, start_year_09, end_year_gl_08, start_year_gl_09,
                      mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_9, l_TVC_9, PI_08)

    PI_10 <- mcMap(PI_calc_10, l_PI_10, l_TVC_10, PI_09, end_year_09, start_year_10, end_year_gl_09, start_year_gl_10,
                      mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_10, l_TVC_10, PI_09)

    PI_11 <- mcMap(PI_calc_11, l_PI_11, l_TVC_11, PI_10, end_year_10, start_year_11, end_year_gl_10, start_year_gl_11,
                      mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_11, l_TVC_11, PI_10)

    PI_12 <- mcMap(PI_calc_12, l_PI_12, l_TVC_12, PI_11, end_year_11, start_year_12, end_year_gl_11, start_year_gl_12,
                      mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_12, l_TVC_12, PI_11)

    PI_13 <- mcMap(PI_calc_13, l_PI_13, l_TVC_13, PI_12, end_year_12, start_year_13, end_year_gl_12, start_year_gl_13,
                   mc.set.seed = TRUE, mc.cores = cores)

    rm(l_PI_13, l_TVC_13, PI_12)

	PI_14 <- mcMap(PI_calc_14, l_PI_14, l_TVC_14, PI_13, end_year_13, start_year_14, end_year_gl_13, start_year_gl_14,
                      mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_14, l_TVC_14, PI_13)
	
    PI_15 <- mcMap(PI_calc_15, l_PI_15, l_TVC_15, PI_14, end_year_14, start_year_15, end_year_gl_14, start_year_gl_15,
                  group, mc.set.seed = TRUE, mc.cores = cores)
    rm(l_PI_15, l_TVC_15, PI_14)
	
    return(PI_15)


}

Fix_weight <- function(dt_06) {
        
        setkey(dt_06,store_code_uc, upc,upc_ver_uc)
		
		# For each group as defined by key, generate unique number 
        dt_06[, groupkey :=.GRP, by = key(dt_06)]
		# Generate total number of observations within each key 
        dt_06[, num_quarter_06 := .N, by = (groupkey)]    
		
		# Keep only store x upc's that show up all quarters in the base period
        out_2006_full <- dt_06[num_quarter_06 == 4,]

        out_2006_full[, c("fips_state_code", "fips_county_code") := NULL]
		
		# Sample selection: Pick out relevant store upc's
        unique_2006 <- unique(out_2006_full)
        unique_2006[, c("quarter",  "Units", "Volume","Price",
                "unit_price",  "groupkey","num_quarter_07") := NULL]
        
        # Merge 2006 with 2007, where 2007 are the store x upc's that have 4 quarters of +ve sales
        out_2006_merge <- out_2006_full[unique_2006, nomatch = 0]
        rm(unique_2006)
        
		# Sum of quantities and prices by store x upc (groupkey and num_quarter_06 are added just to keep those variables)
        out_2006_merge <- out_2006_merge[, TotalVolume := sum(Volume)]									   
		Sum_by_2006 <- out_2006_merge[, list(SumUnits = sum(Units), SumVolume = sum(Volume)),
                                     by = .(store_code_uc, upc,upc_ver_uc,
                                       groupkey, num_quarter_06, TotalVolume)]
					   
		# Average quantity for each store x upc within year
        Q_bar_2006 <- Sum_by_2006[, list(q_fix = SumUnits/4, s_fix = SumVolume/TotalVolume),
                                 by = .(store_code_uc,upc_ver_uc, upc)]
								 
        rm(Sum_by_2006)
        setkey(Q_bar_2006, store_code_uc,upc_ver_uc, upc)
		
		return(Q_bar_2006)
}

# Input yearly segments of list of stores and their raw data, 2006-2007, output raw numbers needed to compute price index as well as total sales 

Year_06_07 <- function(dt_06, dt_07) {
        template <- data.table(quarter=seq(1,4))
        setkey(template, quarter)

        setkey(dt_06,store_code_uc, upc, upc_ver_uc)
		
		# For each group as defined by key, generate unique number 
        dt_06[, groupkey :=.GRP, by = key(dt_06)]
		# Generate total number of observations within each key 
        dt_06[, num_quarter_06 := .N, by = (groupkey)]    

        setkey(dt_07,store_code_uc, upc, upc_ver_uc)

        dt_07[, groupkey :=.GRP, by = key(dt_07)]
        dt_07[, num_quarter_07 := .N, by = (groupkey)]
		
		# Keep only store x upc's that show up over the entire period in both 2006 and 2007 
        out_2006_full <- dt_06[num_quarter_06 == 4,]
        out_2007_full <- dt_07[num_quarter_07 == 4,]

        out_2006_full[, c("fips_state_code", "fips_county_code") := NULL]
        out_2007_full[, c("fips_state_code", "fips_county_code") :=  NULL]

		# Generate sales by quarter (given each input is a store, this is the total sales per store)
        Total_Value_06 <- dt_06[, .(TVC_t = sum(Volume)), by = quarter]
        Total_Value_06[, c("fips_state_code", "fips_county_code") := NULL]
        Total_Value_06 <- Total_Value_06[order(quarter)]
        setkey(Total_Value_06, quarter)

        Total_Value_07 <- dt_07[, .(TVC_t = sum(Volume)), by = quarter]
        Total_Value_07[, c("fips_state_code", "fips_county_code"):= NULL]
        Total_Value_07 <- Total_Value_07[order(quarter)]
        setkey(Total_Value_07, quarter)
		
		# Keep only unique store x upc observations, used for merge into 2006 dataset (we are merging in those store x upc's that are consistent in 2007, this implies the sample restriction for 2006 are balanced products in both 2006 and 2007)
        unique_2007 <- unique(out_2007_full)
        unique_2007[, c("quarter",  "Units", "Volume","Price",
                "unit_price",  "groupkey","num_quarter_07") := NULL]
        
        # Merge 2006 with 2007, where 2007 are the store x upc's that have 4 quarters of +ve sales
        out_2006_merge <- out_2006_full[unique_2007, nomatch = 0]
        rm(unique_2007)
        
		# Sum of quantities and prices by store x upc (groupkey and num_quarter_06 are added just to keep those variables)
        out_2006_merge <- out_2006_merge[, TotalVolume := sum(Volume)]									   
		Sum_by_2006 <- out_2006_merge[, list(SumUnits = sum(Units), SumVolume = sum(Volume)),
                                     by = .(store_code_uc, upc, upc_ver_uc, 
                                       groupkey, num_quarter_06, TotalVolume)]
					   
		# Average quantity for each store x upc within year
        Q_bar_2006 <- Sum_by_2006[, list(q_06 = SumUnits/4, s_06 = SumVolume/TotalVolume),
                                 by = .(store_code_uc, upc, upc_ver_uc)]
								 
        rm(Sum_by_2006)
        setkey(Q_bar_2006, store_code_uc, upc, upc_ver_uc)

        out_2006_merge[, c("Units", "groupkey", "num_quarter_06"):= NULL]
		
		# Merge 2006 raw data with averages just constructed 
        PPT_2006_FULL <- out_2006_merge[Q_bar_2006, nomatch = 0]
			
		# Merge with fixed weights
		setkey(PPT_2006_FULL,store_code_uc,upc, upc_ver_uc)		
		if( nrow(PPT_2006_FULL) > 0 ) {
			
			# Full Outer Join, then replace NA fix weights by zero, and remove observations from fixed weight only, end point collection required since sample could be shrinking
			
			PPT_2006_FULL <- merge(PPT_2006_FULL, w_ft, all = T)
	
			# Remove observations available in fix_weight but not in FULL
			PPT_2006_FULL <- PPT_2006_FULL[is.na(quarter)==F]
			
			# Replace missing weights with zero
			PPT_2006_FULL[,':=' (q_fix = ifelse(is.na(q_fix),0,q_fix), s_fix = ifelse(is.na(s_fix),0,s_fix) )]
						
		} else {
			PPT_2006_FULL[,':=' (q_fix = numeric(), s_fix = numeric() )]			
		}	

		# Generate sales only for those store x upc that passed the selection criteria 
        VUL_1_12 <- PPT_2006_FULL[, list(VUL_t = sum(Volume)), by = quarter]
        setkey(VUL_1_12, quarter)
		
		# Create numerator and denominators as in last term for first stage for each observation 
        PPT_2006_FULL[, `:=` (numer = q_06 * Price, gl_numer = s_06 * log(Price), fix_numer = q_fix * Price,
                      Volume = NULL, Price = NULL, s_06 = NULL, q_06 = NULL)]
		
		# Sum by quarter over all the products (given each input is at the store x product group/module, sums over all j within each store according to equation)
        P_y_1_12 <- PPT_2006_FULL[, .(numersum = sum(numer), gl_numersum = sum(gl_numer), fix_numersum = sum(fix_numer)), by = quarter]

        rm(PPT_2006_FULL)
		
		# Divide numerator by denominator, obtained last term for each unit x product group 
        P_y_1_12[, `:=` (P_y_lm = numersum, P_y_glm = gl_numersum, P_y_fix_lm = fix_numersum, numersum = NULL, gl_numersum = NULL, fix_numersum = NULL)]

        ##### 07 Year ######
        out_2007_full[, c("Units", "groupkey", "num_quarter_07"):= NULL]
        PPT_2007_FULL <- out_2007_full[Q_bar_2006, nomatch = 0]
        rm(Q_bar_2006)
		
		# Merge with fixed weights
		setkey(PPT_2007_FULL,store_code_uc,upc, upc_ver_uc)		
		if( nrow(PPT_2007_FULL) > 0 ) {
			
			# Full Outer Join, then replace NA fix weights by zero, and remove observations from fixed weight only, end point collection required since sample could be shrinking
			
			PPT_2007_FULL <- merge(PPT_2007_FULL, w_ft, all = T)
			
			# PPT_2007_FULL <- merge(PPT_2007_FULL,fix_weight[[as.character(unique(PPT_2007_FULL[,store_code_uc]))]], all=T)
			
			# Remove observations available in fix_weight but not in FULL
			PPT_2007_FULL <- PPT_2007_FULL[is.na(quarter)==F]
			
			# Replace missing weights with zero
			PPT_2007_FULL[,':=' (q_fix = ifelse(is.na(q_fix),0,q_fix), s_fix = ifelse(is.na(s_fix),0,s_fix) )]
						
		} else {
			PPT_2007_FULL[,':=' (q_fix = numeric(), s_fix = numeric() )]			
		}			
		
        VUL_13_24 <- PPT_2007_FULL[, list(VUL_t = sum(Volume)), by = quarter]
        setkey(VUL_13_24, quarter)
		
		# Use last year's mean quantities as weights, denominator is constructed using average values, gets cancelled out except at end points, for which
		# we use a different value, so it's more like a scaling constant than anything 
        PPT_2007_FULL[, `:=` (numer = q_06 * Price, gl_numer = s_06 * log(Price), fix_numer = q_fix * Price,  
                      Volume = NULL, Price = NULL, s_06 = NULL, q_06 = NULL)]

        P_y_13_24 <- PPT_2007_FULL[, .(numersum = sum(numer), gl_numersum = sum(gl_numer), fix_numersum = sum(fix_numer)), by = quarter]

        rm(PPT_2007_FULL)
        P_y_13_24[, `:=` (P_y_lm = numersum, P_y_glm = gl_numersum, P_y_fix_lm = fix_numersum, numersum = NULL, gl_numersum = NULL, fix_numersum = NULL)]
		
		# Bind the 2 key outputs for prices 
        Pi_list <- list(P_y_1_12, P_y_13_24)
        PI_0607<- rbindlist(Pi_list, use.names = TRUE)
        PI_0607[, `:=`(quarter = .I, P_t = 1.0, P_gl_t = 1.0, P_fix_t = 1.0)]
        setkey(PI_0607, quarter)
        rm(Pi_list, P_y_1_12, P_y_13_24)

		
        if (nrow(VUL_1_12) != 4) {
            VUL_1_12 <- replace.missing(VUL_1_12, template)
        }

        if (nrow(VUL_13_24) != 4) {
            VUL_13_24 <- replace.missing(VUL_13_24, template)
        }

		# Bind the 2 key outputs for VUL 
        Vul_list <- list(VUL_1_12,VUL_13_24)
        VUL <- rbindlist(Vul_list, use.names = TRUE)
        VUL[, quarter := .I]
        setkey(VUL, quarter)
        rm(Vul_list, VUL_1_12, VUL_13_24)

        if (nrow(Total_Value_06) != 4) {
            Total_Value_06 <- replace.missing(Total_Value_06, template)
        }

        if (nrow(Total_Value_07) != 4) {
            Total_Value_07 <- replace.missing(Total_Value_07, template)
        }

		# Bind the 2 key outputs for sales 
        TV_list <- list(Total_Value_06, Total_Value_07)
        TVC <- rbindlist(TV_list, use.names = TRUE)
        TVC[, quarter := .I]
        setkey(TVC, quarter)

        VUL_TVC <- VUL[TVC, nomatch = 0]

        rm(TV_list, Total_Value_06, Total_Value_07, TVC, VUL)
		
		# 3 outputs: 1. Last terms of equations for 2006-2007 2. Sales for selected goods and all goods 3. Value of last quarter, used for index construction 
        output <- list("PI" = PI_0607, "VUL_TVC" = VUL_TVC)

        return(output)

}

# Input yearly segments of list of stores and their raw data, years outside of 2006-2007, output raw numbers needed to compute price index 
	# Similar comments in Year_0607, only differences here are commented on 
Year_general <- function(dt_t_1, dt_t, year) {
        template <- data.table(quarter=seq(1,4))
        setkey(template, quarter)

        #dt_t[, c("fips_state_code", "fips_county_code") := NULL]
        setkey(dt_t,store_code_uc, upc, upc_ver_uc)
        setkey(dt_t_1,store_code_uc, upc, upc_ver_uc)

        dt_t_1[, groupkey :=.GRP, by = key(dt_t_1)]
        dt_t_1[, num_quarter := .N, by = (groupkey)] 		
		
        dt_t[, groupkey :=.GRP, by = key(dt_t)]
        dt_t[, num_quarter := .N, by = (groupkey)]    

        Total_Value <- dt_t[, .(TVC_t = sum(Volume)), by = quarter]
        Total_Value <- Total_Value[order(quarter)]
        Total_Value[, c("fips_state_code", "fips_county_code"):= NULL]

        setkey(Total_Value, quarter)

        if (nrow(Total_Value) != 4) {
            Total_Value <- replace.missing(Total_Value, template)
            setkey(Total_Value, quarter)
        }
		# Pick out store-upc's that appeared in every quarter in t 
        out_full <- dt_t[num_quarter == 4,]  
        out_full[, c("fips_state_code", "fips_county_code"):= NULL]
        unique_set <- unique(out_full)
        unique_set[, c("quarter",  "Units", "Volume","Price",
                       "unit_price","groupkey","num_quarter") := NULL]
	   
		# Raw data from t_1, only for store-upc's that appeared in every quarter in t 
        out_merge <- unique_set[dt_t_1, nomatch = 0]
        out_merge[, c("fips_state_code", "fips_county_code"):= NULL]
       
        # rm(unique_set)

        out_merge[, groupkey :=.GRP, by = key(out_merge)]
        out_merge[, num_quarter := .N, by = (groupkey)]
		# For the selected store-upc's in t_1, sum over key 
		out_merge <- out_merge[, TotalVolume := sum(Volume)]	
        Sum_by <- out_merge[, list(SumUnits = sum(Units),
                                      SumVolume = sum(Volume)),
                                      by = .(store_code_uc, upc, upc_ver_uc, 
                                           groupkey, num_quarter, TotalVolume)]
		# Average quantities from t_1 to be used as weights 
        Q_bar <- Sum_by[, list(q_bar = SumUnits/4, s_bar = SumVolume/TotalVolume),
                                 by = .(store_code_uc, upc, upc_ver_uc)]
        rm(Sum_by, out_merge)                                
		
        setkey(Q_bar, store_code_uc, upc, upc_ver_uc)
        out_full[, c("Units", "groupkey", "num_quarter"):= NULL]
        # Merge in weights from t_1 into raw data from t that was pre-selected for eligible store-upc's only 
		PPT_FULL <- out_full[Q_bar, nomatch = 0]

		# Merge with fixed weights
		setkey(PPT_FULL,store_code_uc,upc, upc_ver_uc)		
		if( nrow(PPT_FULL) > 0 ) {
			
			# Full Outer Join, then replace NA fix weights by zero, and remove observations from fixed weight only, end point collection required since sample could be shrinking
			
			PPT_FULL <- merge(PPT_FULL, w_ft, all = T)
			
			# PPT_FULL <- merge(PPT_FULL,fix_weight[[as.character(unique(PPT_FULL[,store_code_uc]))]], all=T)
			
			# Remove observations available in fix_weight but not in FULL
			PPT_FULL <- PPT_FULL[is.na(quarter)==F]
			
			# Replace missing weights with zero
			PPT_FULL[,':=' (q_fix = ifelse(is.na(q_fix),0,q_fix), s_fix = ifelse(is.na(s_fix),0,s_fix) )]
						
		} else {
			PPT_FULL[,':=' (q_fix = numeric(), s_fix = numeric() )]			
		}		

        rm(Q_bar, out_full)
        VUL <- PPT_FULL[, list(VUL_t = sum(Volume)), by = quarter][order(quarter)]
        setkey(VUL, quarter)

        if (nrow(VUL)!= 4) {
            VUL <- replace.missing(VUL, template)
            setkey(VUL,quarter)
        }

        VUL_TVC <- VUL[Total_Value, nomatch = 0]

        rm(VUL, Total_Value)
        
		# These are lagged values as we want them to be because they were merged in using d_t_1 into d_t
        PPT_FULL[, `:=` (numer = q_bar * Price, gl_numer = s_bar * log(Price), fix_numer = q_fix * Price,  
                      Volume = NULL, Price = NULL, s_bar = NULL, q_bar = NULL)]     

        P_y <- PPT_FULL[, .(numersum = sum(numer), gl_numersum = sum(gl_numer), fix_numersum = sum(fix_numer)),
                by = quarter]

        rm(PPT_FULL)

        P_y[, `:=` (P_y_lm = numersum, P_y_glm = gl_numersum, P_y_fix_lm = fix_numersum, numersum = NULL, gl_numersum = NULL, fix_numersum = NULL)]

       ### end quarter ###
	   # Alternative: 1. For the year end year start link, use only products that appear for 4 quarters in t and in q4 of t_1 (since need the price)
	   # 2. For other links, use products that appear for 4 quarters in t 

            end_dec <- dt_t_1[quarter == 4,]
            setkey(end_dec, store_code_uc,upc, upc_ver_uc)
            end_dec[, c("quarter", "Units", "Volume",
                        "unit_price","groupkey","Price",
                    "fips_state_code", "fips_county_code") := NULL]
            end_dec <- unique(end_dec)
			# Selection criteria 1: Pick only store-upc's that showed up in q4 to construct year-end value, match with t_1 raw data 
            end_quarter <- end_dec[dt_t_1, nomatch = 0]
            end_quarter[, c("fips_state_code", "fips_county_code"):= NULL]

            # rm(end_dec)
			
			# Selection criteria 2: Further select the set of UPC's that appeared 4 quarters in t 			
			end_quarter <- end_quarter[unique_set, nomatch=0]
			
			
			# For the subset of store-upc's chosen, construct weights 
			end_quarter <- end_quarter[, TotalVolume := sum(Volume)]
            Sum_end <- end_quarter[, list(SumUnits = sum(Units),
                                      SumVolume = sum(Volume)),
                                      by = .(store_code_uc, upc, upc_ver_uc,
                                           groupkey, num_quarter, TotalVolume)]

            PQ_end <- Sum_end[, list(q_bar = SumUnits/4, s_bar = SumVolume/TotalVolume),
                                 by = .(store_code_uc, upc, upc_ver_uc)]
            rm(Sum_end)
			
			# Pick out q4 of t_1 raw data 
            setkey(PQ_end, store_code_uc, upc, upc_ver_uc)
            end_quarter[, c("Units", "Volume", 
                               "groupkey","num_quarter"):= NULL]
            reduced_end <- end_quarter[quarter == 4,]
            rm(end_quarter)
			
			# Merge q4 t_1 raw data with t_1 weights 
            PPT_end <- reduced_end[PQ_end, nomatch = 0]
            # rm(PQ_end, reduced_end)
		
			# Construct sums for q4 t_1 raw data with t_1 weights
            PPT_end[, `:=` (numer = q_bar * Price, gl_numer = s_bar * log(Price),   
                     Price = NULL, s_bar = NULL, q_bar = NULL)]

            P_y_end_dt <- PPT_end[, .(numersum = sum(numer), gl_numersum = sum(gl_numer)),   
                                by = quarter]
								
            rm(PPT_end)

            P_y_end_dt[, `:=` (P_y_lm = numersum, P_y_glm = gl_numersum, numersum = NULL, gl_numersum = NULL)]
            P_y_end <- P_y_end_dt[, P_y_lm]
			P_y_gl_end <- P_y_end_dt[, P_y_glm]
			
       ### start quarter ###
	   # Alternative: 1. For the year end year start link, use only products that appear for 4 quarters in t and in q4 of t_1 (since need the price)
	   # 2. For other links, use products that appear for 4 quarters in t 

			
			# Selection criteria 1: Pick only store-upc's that showed up in q4 to construct year-end value, match with t raw data 
            start_quarter <- end_dec[dt_t, nomatch = 0]
            start_quarter[, c("fips_state_code", "fips_county_code"):= NULL]

			# Selection criteria 2: Further select the set of UPC's that appeared 4 quarters in t 			
			start_quarter <- start_quarter[unique_set, nomatch = 0]

			# Pick out q1 of t raw data 
            # setkey(PQ_end, store_code_uc, upc, upc_ver_uc)
            start_quarter[, c("Units", "Volume", 
                               "groupkey","num_quarter"):= NULL]
            reduced_start <- start_quarter[quarter == 1,]
            rm(start_quarter)
			
			# Merge q1_t raw data with t_1 weights 
            PPT_start <- reduced_start[PQ_end, nomatch = 0]
            rm(PQ_end, reduced_start)
			
			# Construct sums for q1_t raw data with t_1 weights
            PPT_start[, `:=` (numer = q_bar * Price, gl_numer = s_bar * log(Price),    
                     Price = NULL, s_bar = NULL, q_bar = NULL)]

            P_y_start_dt <- PPT_start[, .(numersum = sum(numer), gl_numersum = sum(gl_numer)),    
                                by = quarter]

            rm(PPT_start)

            P_y_start_dt[, `:=` (P_y_lm = numersum, P_y_glm = gl_numersum, numersum = NULL, gl_numersum = NULL)]
            P_y_start <- P_y_start_dt[, P_y_lm]				
			P_y_gl_start <- P_y_start_dt[, P_y_glm]
			
			# OVERWRITE numeric zero's which causes trouble in data tables 
			if(length(P_y_end)==0){P_y_end <- 0L}
			if(length(P_y_start)==0){P_y_start <- 0L}
			if(length(P_y_gl_end)==0){P_y_gl_end <- 0L}
			if(length(P_y_gl_start)==0){P_y_gl_start <- 0L}
			
            output <- list("PI" = P_y, "VUL_TVC" = VUL_TVC, 
                           "PI_end" = P_y_end, "PI_start" = P_y_start,
						   "PI_gl_end" = P_y_gl_end, "PI_gl_start" = P_y_gl_start)

    
        return(output)

}

				  